import sys
import os

# 程序入口
sys.path.append(r'F:\mypython\final_subject')

from bbdw.split_remove_stop import filter_file
from bbdw.tf_idf import compute_tf_idf
from bbdw.build_graph import build
from bbdw.add_weight_exclude_doc import extract_sub_graph
from bbdw.remove_single_words import remove
from DWGI.exe import train
from DWGI.predict import predict_subject
from DWGI.load_subject import load_res


def start(file):

    filter_file(file)  # 预处理文本
    compute_tf_idf(r'F:\mypython\final_subject\bbdw\new_file.txt')  # 计算TF-IDF

    # 创建文档-词共现图（包含文档节点），生成out文件夹下的内容
    build(r'F:\mypython\final_subject\bbdw\new_file.txt')

    # 抽取出词共现子图，生成exclude_doc_out文件夹下的内容
    extract_sub_graph()

    #  去除单个节点形成的子图（exclude_doc_single_word_out对应不加文档节点的图，
    #  include_doc_single_word_out对应加入文档节点的图）
    # include_doc_single_word_out用于训练图神经网络
    remove()

    # 训练
    train()

    # 预测主题词
    predict_subject()

    # 查看结果
    load_res()


if __name__ == "__main__":
    start(r'F:\KG\corpus_files\external.txt')


